import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows() # Disable 5_000 rows limit
pd.set_option('display.max_rows', 500) # Display 500 rows max
pd.set_option('display.max_columns', 500) # Display 500 columns max
pd.set_option('display.max_colwidth', 1000) # Expand maximum column text display
findings_data = pd.read_csv("https://raw.githubusercontent.com/code-423n4/code423n4.com/main/_data/findings/findings.csv") # Set path accordingly
contests_data = pd.read_csv("https://raw.githubusercontent.com/code-423n4/code423n4.com/main/_data/contests/contests.csv")
submissions_data = pd.read_csv("https://raw.githubusercontent.com/Krow10/code4rena-scraper/master/github_code4rena.csv")
df = pd.merge(submissions_data, submissions_data["tags"].str.get_dummies(';'), how='outer', left_index=True, right_index=True)
df = df[df.contest >= 11] # Remove incorrect early contests ids
df["issueCreation"] = pd.to_datetime(df["issueCreation"])
contests_data.rename(columns={'contestid': 'contest'}, inplace=True)
df2 = findings_data.copy()
df2 = pd.merge(df2, df2.groupby(["contest", "pie"])['split'].count().reset_index().groupby("contest")["split"].sum(), on="contest")
df2 = df2.rename(columns={'split_x': 'split', 'split_y': 'total'})
df2 = pd.merge(df2, (df2.groupby(["contest", "pie"])["split"].count()-1).reset_index().groupby("contest")["split"].sum(), on="contest")
df2 = df2.rename(columns={'split_x': 'split', 'split_y': 'duplicates'})
df2 = pd.merge(df2, df2.groupby("contest")["awardUSD"].sum(), on="contest")
df2 = df2.rename(columns={'awardUSD_x': 'awardUSD', 'awardUSD_y': 'totalAwardUSD'})
df2 = pd.merge(df2, ((df2.groupby(["contest", "pie"])["split"].count()-1) * df2.groupby(["contest", "pie"])["awardUSD"].min()).reset_index().rename(columns={0: 'duplicates_award'}).groupby("contest")["duplicates_award"].sum(), on="contest")
df2 = pd.merge(df2, contests_data[["contest", "end_time"]], on="contest")
df2['end_time'] = pd.to_datetime(df2.end_time).dt.to_period("M").astype({"end_time": str})
/tmp/ipykernel_1870/2846388186.py:10: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.
df2['end_time'] = pd.to_datetime(df2.end_time).dt.to_period("M").astype({"end_time": str})
unselected_color = '#012749'
mean_color = '#fa4d56'
selected_color = '#82cfff'
select_date = alt.selection_interval(encodings=['x'], empty='all')
base = alt.Chart(df2, width=800, height=200).transform_calculate(
duplicate_ratio='datum.duplicates/datum.total'
)
bars = base.mark_bar(
size=10
).encode(
x=alt.X('end_time:T', title=""),
y=alt.Y('duplicate_ratio:Q', axis=alt.Axis(format='%'), title=""),
color=alt.condition(
select_date,
alt.value(selected_color), alt.value(unselected_color)
)
).add_selection(
select_date
)
mean_dup_rule = base.mark_rule(color='red').encode(
y='mean(duplicate_ratio):Q',
color=alt.value(mean_color)
).transform_filter(
select_date
)
base = alt.Chart(df2, width=800, height=200).transform_calculate(
duplicate_money_ratio='datum.duplicates_award/datum.totalAwardUSD'
)
line = base.mark_line(
point=True
).encode(
x=alt.X('end_time:T', title=""),
y=alt.Y('mean(duplicate_money_ratio):Q', axis=alt.Axis(format='%'), title=""),
color=alt.condition(
select_date,
alt.value(selected_color), alt.value(unselected_color)
)
).add_selection(
select_date
)
mean_money_rule = base.mark_rule(color='red').encode(
y='mean(duplicate_money_ratio):Q',
color=alt.value(mean_color)
).transform_filter(
select_date
)
date_slider = alt.Chart(df2[["end_time", "contest"]].drop_duplicates()).mark_bar(
size=35
).encode(
x=alt.X(
'end_time:T',
title="Drag the mouse to select a period. Click outside of the selection to reset.",
axis=alt.Axis(grid=False)
),
y=alt.Y('count(contest):Q', title="", axis=None),
color=alt.condition(
select_date,
alt.value(selected_color), alt.value(unselected_color)
)
).properties(
width=800,
height=50
).add_selection(
select_date
)
((bars + mean_dup_rule).properties(title="Ratio of duplicate findings to total") & \
(line + mean_money_rule).properties(title="Percentage of the prize money captured by duplicate findings") & \
date_slider).configure_view(
strokeWidth=0
)
/opt/hostedtoolcache/Python/3.11.1/x64/lib/python3.11/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. for col_name, dtype in df.dtypes.iteritems():